// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
 *
 * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation, and any use by you of this program is subject to the terms
 * of such GNU license.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, you can access it online at
 * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 */

/* NOTES:
 * - A default GPU can be compiled in during the build, by defining
 *   CONFIG_MALI_NO_MALI_DEFAULT_GPU. SCons sets this, which means that
 *   insmod'ing mali_kbase.ko with no arguments after a build with "scons
 *   gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be
 *   overridden by passing the 'no_mali_gpu' argument to insmod.
 *
 * - if CONFIG_MALI_BIFROST_ERROR_INJECT is defined the error injection system is
 *   activated.
 */

/* Implementation of failure injection system:
 *
 * Error conditions are generated by gpu_generate_error().
 * According to CONFIG_MALI_BIFROST_ERROR_INJECT definition gpu_generate_error() either
 * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or
 * checks if there is (in error_track_list) an error configuration to be set for
 * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined).
 * Each error condition will trigger a specific "state" for a certain set of
 * registers as per Midgard Architecture Specifications doc.
 *
 * According to Midgard Architecture Specifications doc the following registers
 * are always affected by error conditions:
 *
 * JOB Exception:
 *				JOB_IRQ_RAWSTAT
 *				JOB<n> STATUS AREA
 *
 * MMU Exception:
 *				MMU_IRQ_RAWSTAT
 *				AS<n>_FAULTSTATUS
 *				AS<n>_FAULTADDRESS
 *
 * GPU Exception:
 *				GPU_IRQ_RAWSTAT
 *				GPU_FAULTSTATUS
 *				GPU_FAULTADDRESS
 *
 *	For further clarification on the model behaviour upon specific error
 *      conditions the user may refer to the Midgard Architecture Specification
 *      document
 */
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <backend/gpu/mali_kbase_model_dummy.h>
#include <mali_kbase_mem_linux.h>

#if MALI_USE_CSF
#include <csf/mali_kbase_csf_firmware.h>

/* Index of the last value register for each type of core, with the 1st value
 * register being at index 0.
 */
#define IPA_CTL_MAX_VAL_CNT_IDX (KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS - 1)

/* Array for storing the value of SELECT register for each type of core */
static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM];
static bool ipa_control_timer_enabled;
#endif

#define LO_MASK(M) ((M) & 0xFFFFFFFF)
#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000)

static u32 get_implementation_register(u32 reg)
{
	switch (reg) {
	case GPU_CONTROL_REG(SHADER_PRESENT_LO):
		return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT);
	case GPU_CONTROL_REG(TILER_PRESENT_LO):
		return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
	case GPU_CONTROL_REG(L2_PRESENT_LO):
		return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
	case GPU_CONTROL_REG(STACK_PRESENT_LO):
		return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT);

	case GPU_CONTROL_REG(SHADER_PRESENT_HI):
	case GPU_CONTROL_REG(TILER_PRESENT_HI):
	case GPU_CONTROL_REG(L2_PRESENT_HI):
	case GPU_CONTROL_REG(STACK_PRESENT_HI):
	/* *** FALLTHROUGH *** */
	default:
		return 0;
	}
}

struct {
	spinlock_t access_lock;
#if !MALI_USE_CSF
	unsigned long prfcnt_base;
#endif /* !MALI_USE_CSF */
	u32 *prfcnt_base_cpu;

	u32 time;

	struct gpu_model_prfcnt_en prfcnt_en;

	u64 l2_present;
	u64 shader_present;

#if !MALI_USE_CSF
	u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
#else
	u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
#endif /* !MALI_USE_CSF */
	u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
	u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
			KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
	u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
			    KBASE_DUMMY_MODEL_COUNTER_PER_CORE];

} performance_counters = {
	.l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT,
	.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
};

struct job_slot {
	int job_active;
	int job_queued;
	int job_complete_irq_asserted;
	int job_irq_mask;
	int job_disabled;
};

/**
 * struct control_reg_values_t - control register values specific to the GPU being 'emulated'
 * @name:			GPU name
 * @gpu_id:			GPU ID to report
 * @as_present:			Bitmap of address spaces present
 * @thread_max_threads:		Maximum number of threads per core
 * @thread_max_workgroup_size:	Maximum number of threads per workgroup
 * @thread_max_barrier_size:	Maximum number of threads per barrier
 * @thread_features:		Thread features, NOT INCLUDING the 2
 *				most-significant bits, which are always set to
 *				IMPLEMENTATION_MODEL.
 * @core_features:		Core features
 * @tiler_features:		Tiler features
 * @mmu_features:		MMU features
 * @gpu_features_lo:		GPU features (low)
 * @gpu_features_hi:		GPU features (high)
 */
struct control_reg_values_t {
	const char *name;
	u32 gpu_id;
	u32 as_present;
	u32 thread_max_threads;
	u32 thread_max_workgroup_size;
	u32 thread_max_barrier_size;
	u32 thread_features;
	u32 core_features;
	u32 tiler_features;
	u32 mmu_features;
	u32 gpu_features_lo;
	u32 gpu_features_hi;
};

struct dummy_model_t {
	int reset_completed;
	int reset_completed_mask;
#if !MALI_USE_CSF
	int prfcnt_sample_completed;
#endif /* !MALI_USE_CSF */
	int power_changed_mask;	/* 2bits: _ALL,_SINGLE */
	int power_changed;	/* 1bit */
	bool clean_caches_completed;
	bool clean_caches_completed_irq_enabled;
	int power_on;		/* 6bits: SHADER[4],TILER,L2 */
	u32 stack_power_on_lo;
	u32 coherency_enable;
	unsigned int job_irq_js_state;
	struct job_slot slots[NUM_SLOTS];
	const struct control_reg_values_t *control_reg_values;
	u32 l2_config;
	void *data;
};

void gpu_device_set_data(void *model, void *data)
{
	struct dummy_model_t *dummy = (struct dummy_model_t *)model;

	dummy->data = data;
}

void *gpu_device_get_data(void *model)
{
	struct dummy_model_t *dummy = (struct dummy_model_t *)model;

	return dummy->data;
}

#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1

/* SCons should pass in a default GPU, but other ways of building (e.g.
 * in-tree) won't, so define one here in case.
 */
#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
#endif

static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
module_param(no_mali_gpu, charp, 0000);
MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");

/* Construct a value for the THREAD_FEATURES register, *except* the two most
 * significant bits, which are set to IMPLEMENTATION_MODEL in
 * midgard_model_read_reg().
 */
#if MALI_USE_CSF
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
	((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
#else
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
	((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
#endif

/* Array associating GPU names with control register values. The first
 * one is used in the case of no match.
 */
static const struct control_reg_values_t all_control_reg_values[] = {
	{
		.name = "tMIx",
		.gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tHEx",
		.gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tSIx",
		.gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x300,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
		.tiler_features = 0x209,
		.mmu_features = 0x2821,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tDVx",
		.gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x300,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
		.tiler_features = 0x209,
		.mmu_features = 0x2821,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tNOx",
		.gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tGOx_r0p0",
		.gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tGOx_r1p0",
		.gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
		.core_features = 0x2,
		.tiler_features = 0x209,
		.mmu_features = 0x2823,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tTRx",
		.gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tNAx",
		.gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tBEx",
		.gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tBAx",
		.gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tDUx",
		.gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tODx",
		.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tGRx",
		.gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
		.core_features = 0x0, /* core_1e16fma2tex */
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tVAx",
		.gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x180,
		.thread_max_workgroup_size = 0x180,
		.thread_max_barrier_size = 0x180,
		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
		.core_features = 0x0, /* core_1e16fma2tex */
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0,
		.gpu_features_hi = 0,
	},
	{
		.name = "tTUx",
		.gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0),
		.as_present = 0xFF,
		.thread_max_threads = 0x800,
		.thread_max_workgroup_size = 0x400,
		.thread_max_barrier_size = 0x400,
		.thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0),
		.core_features = 0x0, /* core_1e32fma2tex */
		.tiler_features = 0x809,
		.mmu_features = 0x2830,
		.gpu_features_lo = 0xf,
		.gpu_features_hi = 0,
	},
};

struct error_status_t hw_error_status;

#if MALI_USE_CSF
static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
				      u32 cnt_idx, bool is_low_word)
{
	u64 *counters_data;
	u32 core_count = 0;
	u32 event_index;
	u64 value = 0;
	u32 core;
	unsigned long flags;

	if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM))
		return 0;

	if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))
		return 0;

	event_index =
		(ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;

	/* Currently only primary counter blocks are supported */
	if (WARN_ON(event_index >= 64))
		return 0;

	/* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
	 * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
	 * IPA counters. If selected, the value returned for them will be zero.
	 */
	if (WARN_ON(event_index <= 3))
		return 0;

	event_index -= 4;

	spin_lock_irqsave(&performance_counters.access_lock, flags);

	switch (core_type) {
	case KBASE_IPA_CORE_TYPE_CSHW:
		core_count = 1;
		counters_data = performance_counters.cshw_counters;
		break;
	case KBASE_IPA_CORE_TYPE_MEMSYS:
		core_count = hweight64(performance_counters.l2_present);
		counters_data = performance_counters.l2_counters;
		break;
	case KBASE_IPA_CORE_TYPE_TILER:
		core_count = 1;
		counters_data = performance_counters.tiler_counters;
		break;
	case KBASE_IPA_CORE_TYPE_SHADER:
		core_count = hweight64(performance_counters.shader_present);
		counters_data = performance_counters.shader_counters;
		break;
	default:
		WARN(1, "Invalid core_type %d\n", core_type);
		break;
	}

	for (core = 0; core < core_count; core++) {
		value += counters_data[event_index];
		event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
	}

	spin_unlock_irqrestore(&performance_counters.access_lock, flags);

	if (is_low_word)
		return (value & U32_MAX);
	else
		return (value >> 32);
}
#endif /* MALI_USE_CSF */

/**
 * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values
 *
 * Sets all performance counter values to zero. The performance counter access
 * lock must be held when calling this function.
 */
static void gpu_model_clear_prfcnt_values_nolock(void)
{
	lockdep_assert_held(&performance_counters.access_lock);
#if !MALI_USE_CSF
	memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters));
#else
	memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters));
#endif /* !MALI_USE_CSF */
	memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters));
	memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters));
	memset(performance_counters.shader_counters, 0,
	       sizeof(performance_counters.shader_counters));
}

#if MALI_USE_CSF
void gpu_model_clear_prfcnt_values(void)
{
	unsigned long flags;

	spin_lock_irqsave(&performance_counters.access_lock, flags);
	gpu_model_clear_prfcnt_values_nolock();
	spin_unlock_irqrestore(&performance_counters.access_lock, flags);
}
KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
#endif /* MALI_USE_CSF */

/**
 * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer
 *
 * @values:             Array of values to be written out
 * @out_index:          Index into performance counter buffer
 * @block_count:        Number of blocks to dump
 * @prfcnt_enable_mask: Counter enable mask
 * @blocks_present:     Available blocks bit mask
 *
 * The performance counter access lock must be held before calling this
 * function.
 */
static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count,
					 u32 prfcnt_enable_mask, u64 blocks_present)
{
	u32 block_idx, counter;
	u32 counter_value = 0;
	u32 *prfcnt_base;
	u32 index = 0;

	lockdep_assert_held(&performance_counters.access_lock);

	prfcnt_base = performance_counters.prfcnt_base_cpu;

	for (block_idx = 0; block_idx < block_count; block_idx++) {
		/* only dump values if core is present */
		if (!(blocks_present & (1 << block_idx))) {
#if MALI_USE_CSF
			/* if CSF dump zeroed out block */
			memset(&prfcnt_base[*out_index], 0,
			       KBASE_DUMMY_MODEL_BLOCK_SIZE);
			*out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK;
#endif /* MALI_USE_CSF */
			continue;
		}

		/* write the header */
		prfcnt_base[*out_index] = performance_counters.time++;
		prfcnt_base[*out_index+2] = prfcnt_enable_mask;
		*out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;

		/* write the counters */
		for (counter = 0;
			 counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
			 counter++) {
			/* HW counter values retrieved through
			 * PRFCNT_SAMPLE request are of 32 bits only.
			 */
			counter_value = (u32)values[index++];
			if (KBASE_DUMMY_MODEL_COUNTER_ENABLED(
				 prfcnt_enable_mask, (counter +
				 KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) {
				prfcnt_base[*out_index + counter] =
					counter_value;
			}
		}
		*out_index +=  KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
	}
}

static void gpu_model_dump_nolock(void)
{
	u32 index = 0;

	lockdep_assert_held(&performance_counters.access_lock);

#if !MALI_USE_CSF
	gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1,
				     performance_counters.prfcnt_en.fe, 0x1);
#else
	gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1,
				     performance_counters.prfcnt_en.fe, 0x1);
#endif /* !MALI_USE_CSF */
	gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters,
				     &index, 1,
				     performance_counters.prfcnt_en.tiler,
				     DUMMY_IMPLEMENTATION_TILER_PRESENT);
	gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index,
				     KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
				     performance_counters.prfcnt_en.l2,
				     performance_counters.l2_present);
	gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters,
				     &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES,
				     performance_counters.prfcnt_en.shader,
				     performance_counters.shader_present);

	/* Counter values are cleared after each dump */
	gpu_model_clear_prfcnt_values_nolock();

	/* simulate a 'long' time between samples */
	performance_counters.time += 10;
}

#if !MALI_USE_CSF
static void midgard_model_dump_prfcnt(void)
{
	unsigned long flags;

	spin_lock_irqsave(&performance_counters.access_lock, flags);
	gpu_model_dump_nolock();
	spin_unlock_irqrestore(&performance_counters.access_lock, flags);
}
#else
void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps)
{
	unsigned long flags;

	if (WARN_ON(!sample_buf))
		return;

	spin_lock_irqsave(&performance_counters.access_lock, flags);
	performance_counters.prfcnt_base_cpu = sample_buf;
	performance_counters.prfcnt_en = enable_maps;
	gpu_model_dump_nolock();
	spin_unlock_irqrestore(&performance_counters.access_lock, flags);
}

void gpu_model_glb_request_job_irq(void *model)
{
	unsigned long flags;

	spin_lock_irqsave(&hw_error_status.access_lock, flags);
	hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF;
	spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
	gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ);
}
#endif /* !MALI_USE_CSF */

static void init_register_statuses(struct dummy_model_t *dummy)
{
	int i;

	hw_error_status.errors_mask = 0;
	hw_error_status.gpu_error_irq = 0;
	hw_error_status.gpu_fault_status = 0;
	hw_error_status.job_irq_rawstat = 0;
	hw_error_status.job_irq_status = 0;
	hw_error_status.mmu_irq_rawstat = 0;
	hw_error_status.mmu_irq_mask = 0;

	for (i = 0; i < NUM_SLOTS; i++) {
		hw_error_status.js_status[i] = 0;
		hw_error_status.job_irq_rawstat |=
			(dummy->slots[i].job_complete_irq_asserted) << i;
		hw_error_status.job_irq_status |=
			(dummy->slots[i].job_complete_irq_asserted) << i;
	}
	for (i = 0; i < NUM_MMU_AS; i++) {
		hw_error_status.as_command[i] = 0;
		hw_error_status.as_faultstatus[i] = 0;
		hw_error_status.mmu_irq_mask |= 1 << i;
	}

	performance_counters.time = 0;
}

static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
{
	lockdep_assert_held(&hw_error_status.access_lock);

	if (hw_error_status.errors_mask & IS_A_JOB_ERROR) {
		if (job_slot == hw_error_status.current_job_slot) {
#if !MALI_USE_CSF
			if (hw_error_status.js_status[job_slot] == 0) {
				/* status reg is clean; it can be written */

				switch (hw_error_status.errors_mask &
							IS_A_JOB_ERROR) {
				case KBASE_JOB_INTERRUPTED:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_INTERRUPTED;
					break;

				case KBASE_JOB_STOPPED:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_STOPPED;
					break;

				case KBASE_JOB_TERMINATED:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_TERMINATED;
					break;

				case KBASE_JOB_CONFIG_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_CONFIG_FAULT;
					break;

				case KBASE_JOB_POWER_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_POWER_FAULT;
					break;

				case KBASE_JOB_READ_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_READ_FAULT;
					break;

				case KBASE_JOB_WRITE_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_WRITE_FAULT;
					break;

				case KBASE_JOB_AFFINITY_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_AFFINITY_FAULT;
					break;

				case KBASE_JOB_BUS_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_BUS_FAULT;
					break;

				case KBASE_INSTR_INVALID_PC:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_INSTR_INVALID_PC;
					break;

				case KBASE_INSTR_INVALID_ENC:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_INSTR_INVALID_ENC;
					break;

				case KBASE_INSTR_TYPE_MISMATCH:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_INSTR_TYPE_MISMATCH;
					break;

				case KBASE_INSTR_OPERAND_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_INSTR_OPERAND_FAULT;
					break;

				case KBASE_INSTR_TLS_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_INSTR_TLS_FAULT;
					break;

				case KBASE_INSTR_BARRIER_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_INSTR_BARRIER_FAULT;
					break;

				case KBASE_INSTR_ALIGN_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_INSTR_ALIGN_FAULT;
					break;

				case KBASE_DATA_INVALID_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_DATA_INVALID_FAULT;
					break;

				case KBASE_TILE_RANGE_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_TILE_RANGE_FAULT;
					break;

				case KBASE_ADDR_RANGE_FAULT:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_ADDRESS_RANGE_FAULT;
					break;

				case KBASE_OUT_OF_MEMORY:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_OUT_OF_MEMORY;
					break;

				case KBASE_UNKNOWN:
					hw_error_status.js_status[job_slot] =
						JS_STATUS_UNKNOWN;
					break;

				default:
					model_error_log(KBASE_CORE,
					"\nAtom Chain 0x%llx: Invalid Error Mask!",
						hw_error_status.current_jc);
					break;
				}
			}
#endif /* !MALI_USE_CSF */

			/* we set JOB_FAIL_<n> */
			hw_error_status.job_irq_rawstat |=
			(dummy->slots[job_slot].job_complete_irq_asserted) <<
								(job_slot + 16);
			hw_error_status.job_irq_status |=
			(((dummy->slots[job_slot].job_complete_irq_asserted) <<
								(job_slot)) &
					(dummy->slots[job_slot].job_irq_mask <<
							job_slot)) << 16;
		} else {
			hw_error_status.job_irq_rawstat |=
			(dummy->slots[job_slot].job_complete_irq_asserted) <<
								job_slot;
			hw_error_status.job_irq_status |=
			((dummy->slots[job_slot].job_complete_irq_asserted) <<
								(job_slot)) &
					(dummy->slots[job_slot].job_irq_mask <<
								job_slot);
		}
	} else {
		hw_error_status.job_irq_rawstat |=
			(dummy->slots[job_slot].job_complete_irq_asserted) <<
								job_slot;
		hw_error_status.job_irq_status |=
			((dummy->slots[job_slot].job_complete_irq_asserted) <<
								(job_slot)) &
			(dummy->slots[job_slot].job_irq_mask << job_slot);
	}			/* end of job register statuses */

	if (hw_error_status.errors_mask & IS_A_MMU_ERROR) {
		int i;

		for (i = 0; i < NUM_MMU_AS; i++) {
			if (i == hw_error_status.faulty_mmu_as) {
				if (hw_error_status.as_faultstatus[i] == 0) {
					u32 status =
					hw_error_status.as_faultstatus[i];
					/* status reg is clean; it can be
					 * written
					 */
					switch (hw_error_status.errors_mask &
							IS_A_MMU_ERROR) {
					case KBASE_TRANSLATION_FAULT:
						/* 0xCm means TRANSLATION FAULT
						 * (m is mmu_table_level)
						 */
						status =
							((1 << 7) | (1 << 6) |
						hw_error_status.mmu_table_level
									);
						break;

					case KBASE_PERMISSION_FAULT:
						/*0xC8 means PERMISSION FAULT */
						status = ((1 << 7) | (1 << 6) |
								(1 << 3));
						break;

					case KBASE_TRANSTAB_BUS_FAULT:
						/* 0xDm means TRANSITION TABLE
						 * BUS FAULT (m is
						 * mmu_table_level)
						 */
						status = ((1 << 7) | (1 << 6) |
								(1 << 4) |
						hw_error_status.mmu_table_level
									);
						break;

					case KBASE_ACCESS_FLAG:
						/* 0xD8 means ACCESS FLAG */
						status = ((1 << 7) | (1 << 6) |
							(1 << 4) | (1 << 3));
						break;

					default:
						model_error_log(KBASE_CORE,
						"\nAtom Chain 0x%llx: Invalid Error Mask!",
						hw_error_status.current_jc);
						break;
					}
					hw_error_status.as_faultstatus[i] =
									status;
				}

				if (hw_error_status.errors_mask &
						KBASE_TRANSTAB_BUS_FAULT)
					hw_error_status.mmu_irq_rawstat |=
						1 << (16 + i); /* bus error */
				else
					hw_error_status.mmu_irq_rawstat |=
						1 << i; /* page fault */
			}
		}
	}			/*end of mmu register statuses */
	if (hw_error_status.errors_mask & IS_A_GPU_ERROR) {
		if (hw_error_status.gpu_fault_status) {
			/* not the first GPU error reported */
			hw_error_status.gpu_error_irq |= (1 << 7);
		} else {
			hw_error_status.gpu_error_irq |= 1;
			switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) {
			case KBASE_DELAYED_BUS_FAULT:
				hw_error_status.gpu_fault_status = (1 << 7);
				break;

			case KBASE_SHAREABILITY_FAULT:
				hw_error_status.gpu_fault_status = (1 << 7) |
								(1 << 3);
				break;

			default:
				model_error_log(KBASE_CORE,
				"\nAtom Chain 0x%llx: Invalid Error Mask!",
						hw_error_status.current_jc);
				break;
			}
		}
	}
	hw_error_status.errors_mask = 0;	/*clear error mask */
}

#if !MALI_USE_CSF
static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask)
{
	int i;

	lockdep_assert_held(&hw_error_status.access_lock);
	pr_debug("%s", "Updating the JS_ACTIVE register");

	for (i = 0; i < NUM_SLOTS; i++) {
		int slot_active = dummy->slots[i].job_active;
		int next_busy = dummy->slots[i].job_queued;

		if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) {
			/* clear the bits we're updating */
			dummy->job_irq_js_state &= ~((1 << (16 + i)) |
								(1 << i));
			if (hw_error_status.js_status[i]) {
				dummy->job_irq_js_state |= next_busy <<
								(i + 16);
				if (mask & (1 << (i + 16))) {
					/* clear job slot status */
					hw_error_status.js_status[i] = 0;
					/* continue execution of jobchain */
					dummy->slots[i].job_active =
						dummy->slots[i].job_queued;
				}
			} else {
				/* set bits if needed */
				dummy->job_irq_js_state |= ((slot_active << i) |
						(next_busy << (i + 16)));
			}
		}
	}
	pr_debug("The new snapshot is 0x%08X\n", dummy->job_irq_js_state);
}
#endif /* !MALI_USE_CSF */

/**
 * find_control_reg_values() - Look up constant control register values.
 * @gpu:	GPU name
 *
 * Look up the GPU name to find the correct set of control register values for
 * that GPU. If not found, warn and use the first values in the array.
 *
 * Return: Pointer to control register values for that GPU.
 */
static const struct control_reg_values_t *find_control_reg_values(const char *gpu)
{
	size_t i;
	const struct control_reg_values_t *ret = NULL;

	for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) {
		const struct control_reg_values_t * const fcrv = &all_control_reg_values[i];

		if (!strcmp(fcrv->name, gpu)) {
			ret = fcrv;
			pr_debug("Found control register values for %s\n", gpu);
			break;
		}
	}

	if (!ret) {
		ret = &all_control_reg_values[0];
		pr_warn("Couldn't find control register values for GPU %s; using default %s\n",
			gpu, ret->name);
	}

	return ret;
}

void *midgard_model_create(const void *config)
{
	struct dummy_model_t *dummy = NULL;

	spin_lock_init(&hw_error_status.access_lock);
	spin_lock_init(&performance_counters.access_lock);

	dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);

	if (dummy) {
		dummy->job_irq_js_state = 0;
		init_register_statuses(dummy);
		dummy->control_reg_values = find_control_reg_values(no_mali_gpu);
	}
	return dummy;
}

void midgard_model_destroy(void *h)
{
	kfree((void *)h);
}

static void midgard_model_get_outputs(void *h)
{
	struct dummy_model_t *dummy = (struct dummy_model_t *)h;

	lockdep_assert_held(&hw_error_status.access_lock);

	if (hw_error_status.job_irq_status)
		gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);

	if ((dummy->power_changed && dummy->power_changed_mask) ||
	    (dummy->reset_completed & dummy->reset_completed_mask) ||
	    hw_error_status.gpu_error_irq ||
#if !MALI_USE_CSF
	    dummy->prfcnt_sample_completed ||
#endif
	    (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
		gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);

	if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
		gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
}

static void midgard_model_update(void *h)
{
	struct dummy_model_t *dummy = (struct dummy_model_t *)h;
	int i;

	lockdep_assert_held(&hw_error_status.access_lock);

	for (i = 0; i < NUM_SLOTS; i++) {
		if (!dummy->slots[i].job_active)
			continue;

		if (dummy->slots[i].job_disabled) {
			update_register_statuses(dummy, i);
			continue;
		}

		/* If there are any pending interrupts that have not
		 * been cleared we cannot run the job in the next register
		 * as we will overwrite the register status of the job in
		 * the head registers - which has not yet been read
		 */
		if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) ||
		   (hw_error_status.job_irq_rawstat & (1 << i))) {
			continue;
		}

		/*this job is done assert IRQ lines */
		signal_int(dummy, i);
#ifdef CONFIG_MALI_BIFROST_ERROR_INJECT
		midgard_set_error(i);
#endif				/* CONFIG_MALI_BIFROST_ERROR_INJECT */
		update_register_statuses(dummy, i);
		/*if this job slot returned failures we cannot use it */
		if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) {
			dummy->slots[i].job_active = 0;
			continue;
		}
		/*process next job */
		dummy->slots[i].job_active = dummy->slots[i].job_queued;
		dummy->slots[i].job_queued = 0;
		if (dummy->slots[i].job_active) {
			if (hw_error_status.job_irq_rawstat & (1 << (i + 16)))
				model_error_log(KBASE_CORE,
				"\natom %lld running a job on a dirty slot",
						hw_error_status.current_jc);
		}
	}
}

static void invalidate_active_jobs(struct dummy_model_t *dummy)
{
	int i;

	lockdep_assert_held(&hw_error_status.access_lock);

	for (i = 0; i < NUM_SLOTS; i++) {
		if (dummy->slots[i].job_active) {
			hw_error_status.job_irq_rawstat |= (1 << (16 + i));

		hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/
		}
	}
}

u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
{
	unsigned long flags;
	struct dummy_model_t *dummy = (struct dummy_model_t *)h;

	spin_lock_irqsave(&hw_error_status.access_lock, flags);

#if !MALI_USE_CSF
	if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
			(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
		int slot_idx = (addr >> 7) & 0xf;

		KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
		if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
			hw_error_status.current_jc &=
						~((u64) (0xFFFFFFFF));
			hw_error_status.current_jc |= (u64) value;
		}
		if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) {
			hw_error_status.current_jc &= (u64) 0xFFFFFFFF;
			hw_error_status.current_jc |=
						((u64) value) << 32;
		}
		if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) &&
								value == 1) {
			pr_debug("%s", "start detected");
			KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active ||
					!dummy->slots[slot_idx].job_queued);
			if ((dummy->slots[slot_idx].job_active) ||
					(hw_error_status.job_irq_rawstat &
						(1 << (slot_idx + 16)))) {
				pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending  ~~~~~~~~~"
									);
				dummy->slots[slot_idx].job_queued = 1;
			} else {
				dummy->slots[slot_idx].job_active = 1;
			}
		}

		if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value ==
									0)
			dummy->slots[slot_idx].job_queued = 0;

		if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) &&
				(value == JS_COMMAND_SOFT_STOP ||
					value == JS_COMMAND_HARD_STOP)) {
			/*dummy->slots[slot_idx].job_active = 0; */
			hw_error_status.current_job_slot = slot_idx;
			if (value == JS_COMMAND_SOFT_STOP) {
				hw_error_status.errors_mask = KBASE_JOB_STOPPED;
			} else {	/*value == 3 */

				if (dummy->slots[slot_idx].job_disabled != 0) {
					pr_debug("enabling slot after HARD_STOP"
									);
					dummy->slots[slot_idx].job_disabled = 0;
				}
				hw_error_status.errors_mask =
							KBASE_JOB_TERMINATED;
			}
		}
	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) {
		int i;

		for (i = 0; i < NUM_SLOTS; i++) {
			if (value & ((1 << i) | (1 << (i + 16))))
				dummy->slots[i].job_complete_irq_asserted = 0;
			/* hw_error_status.js_status[i] is cleared in
			 * update_job_irq_js_state
			 */
		}
		pr_debug("%s", "job irq cleared");
		update_job_irq_js_state(dummy, value);
		/*remove error condition for JOB */
		hw_error_status.job_irq_rawstat &= ~(value);
		hw_error_status.job_irq_status &= ~(value);
	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
		int i;

		for (i = 0; i < NUM_SLOTS; i++)
			dummy->slots[i].job_irq_mask = (value >> i) & 0x01;
		pr_debug("job irq mask to value %x", value);
	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
#else /* !MALI_USE_CSF */
	if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) {
		pr_debug("%s", "job irq cleared");

		hw_error_status.job_irq_rawstat &= ~(value);
		hw_error_status.job_irq_status &= ~(value);
	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
		/* ignore JOB_IRQ_MASK as it is handled by CSFFW */
	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
#endif /* !MALI_USE_CSF */
		pr_debug("GPU_IRQ_MASK set to 0x%x", value);
		dummy->reset_completed_mask = (value >> 8) & 0x01;
		dummy->power_changed_mask = (value >> 9) & 0x03;
		dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u;
	} else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) {
		dummy->coherency_enable = value;
	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) {
		if (value & (1 << 8)) {
			pr_debug("%s", "gpu RESET_COMPLETED irq cleared");
			dummy->reset_completed = 0;
		}
		if (value & (3 << 9))
			dummy->power_changed = 0;

		if (value & (1 << 17))
			dummy->clean_caches_completed = false;
#if   !MALI_USE_CSF
		if (value & PRFCNT_SAMPLE_COMPLETED)
			dummy->prfcnt_sample_completed = 0;
#endif /* !MALI_USE_CSF */
		/*update error status */
		hw_error_status.gpu_error_irq &= ~(value);
	} else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
		switch (value) {
		case GPU_COMMAND_SOFT_RESET:
		case GPU_COMMAND_HARD_RESET:
			pr_debug("gpu reset (%d) requested", value);
			/* no more fault status */
			hw_error_status.gpu_fault_status = 0;
			/* completed reset instantly */
			dummy->reset_completed = 1;
			break;
#if MALI_USE_CSF
		case GPU_COMMAND_CACHE_CLN_INV_L2:
		case GPU_COMMAND_CACHE_CLN_INV_L2_LSC:
		case GPU_COMMAND_CACHE_CLN_INV_FULL:
#else
		case GPU_COMMAND_CLEAN_CACHES:
		case GPU_COMMAND_CLEAN_INV_CACHES:
#endif
			pr_debug("clean caches requested");
			dummy->clean_caches_completed = true;
			break;
#if   !MALI_USE_CSF
		case GPU_COMMAND_PRFCNT_SAMPLE:
			midgard_model_dump_prfcnt();
			dummy->prfcnt_sample_completed = 1;
#endif /* !MALI_USE_CSF */
		default:
			break;
		}
	} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
		dummy->l2_config = value;
	}
#if MALI_USE_CSF
	else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) &&
			 addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET +
						(CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
		if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
			hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
	} else if (addr == IPA_CONTROL_REG(COMMAND)) {
		pr_debug("Received IPA_CONTROL command");
	} else if (addr == IPA_CONTROL_REG(TIMER)) {
		ipa_control_timer_enabled = value ? true : false;
	} else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) &&
		   (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) {
		enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)(
			(addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3);
		bool is_low_word =
			!((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7);

		if (is_low_word) {
			ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX;
			ipa_ctl_select_config[core_type] |= value;
		} else {
			ipa_ctl_select_config[core_type] &= U32_MAX;
			ipa_ctl_select_config[core_type] |= ((u64)value << 32);
		}
	}
#endif
	else if (addr == MMU_REG(MMU_IRQ_MASK)) {
		hw_error_status.mmu_irq_mask = value;
	} else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
		hw_error_status.mmu_irq_rawstat &= (~value);
	} else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
			(addr <= MMU_AS_REG(15, AS_STATUS))) {
		int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
									>> 6;

		switch (addr & 0x3F) {
		case AS_COMMAND:
			switch (value) {
			case AS_COMMAND_NOP:
				hw_error_status.as_command[mem_addr_space] =
									value;
				break;

			case AS_COMMAND_UPDATE:
				hw_error_status.as_command[mem_addr_space] =
									value;
				if ((hw_error_status.as_faultstatus[
								mem_addr_space])
					&& ((hw_error_status.as_transtab[
						mem_addr_space] & 0x3) != 0)) {
					model_error_log(KBASE_CORE,
					"\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n"
									);
				} else if ((hw_error_status.as_faultstatus[
								mem_addr_space])
					&& ((hw_error_status.as_transtab[
						mem_addr_space] & 0x3) == 0)) {

					/*invalidate all active jobs */
					invalidate_active_jobs(dummy);
					/* error handled */
					hw_error_status.as_faultstatus[
							mem_addr_space] = 0;
				}
				break;

			case AS_COMMAND_LOCK:
			case AS_COMMAND_UNLOCK:
				hw_error_status.as_command[mem_addr_space] =
									value;
				break;

			case AS_COMMAND_FLUSH_PT:
			case AS_COMMAND_FLUSH_MEM:
				if (hw_error_status.as_command[mem_addr_space]
							!= AS_COMMAND_LOCK)
					model_error_log(KBASE_CORE,
						"\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n"
									);
				else /* error handled if any */
					hw_error_status.as_faultstatus[
							mem_addr_space] = 0;
				hw_error_status.as_command[mem_addr_space] =
									value;
				break;

			default:
				model_error_log(KBASE_CORE,
				"\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n",
									value);
				break;
			}
			break;

		case AS_TRANSTAB_LO:
			hw_error_status.as_transtab[mem_addr_space] &=
						~((u64) (0xffffffff));
			hw_error_status.as_transtab[mem_addr_space] |=
						(u64) value;
			break;

		case AS_TRANSTAB_HI:
			hw_error_status.as_transtab[mem_addr_space] &=
						(u64) 0xffffffff;
			hw_error_status.as_transtab[mem_addr_space] |=
						((u64) value) << 32;
			break;

		case AS_LOCKADDR_LO:
		case AS_LOCKADDR_HI:
		case AS_MEMATTR_LO:
		case AS_MEMATTR_HI:
		case AS_TRANSCFG_LO:
		case AS_TRANSCFG_HI:
			/* Writes ignored */
			break;

		default:
			model_error_log(KBASE_CORE,
				"Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n",
						mem_addr_space, addr, value);
			break;
		}
	} else {
		switch (addr) {
#if !MALI_USE_CSF
		case PRFCNT_BASE_LO:
			performance_counters.prfcnt_base =
				HI_MASK(performance_counters.prfcnt_base) | value;
			performance_counters.prfcnt_base_cpu =
				(u32 *)(uintptr_t)performance_counters.prfcnt_base;
			break;
		case PRFCNT_BASE_HI:
			performance_counters.prfcnt_base =
				LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32);
			performance_counters.prfcnt_base_cpu =
				(u32 *)(uintptr_t)performance_counters.prfcnt_base;
			break;
		case PRFCNT_JM_EN:
			performance_counters.prfcnt_en.fe = value;
			break;
		case PRFCNT_SHADER_EN:
			performance_counters.prfcnt_en.shader = value;
			break;
		case PRFCNT_TILER_EN:
			performance_counters.prfcnt_en.tiler = value;
			break;
		case PRFCNT_MMU_L2_EN:
			performance_counters.prfcnt_en.l2 = value;
			break;
#endif /* !MALI_USE_CSF */
		case TILER_PWRON_LO:
			dummy->power_on |= (value & 1) << 1;
			/* Also ensure L2 is powered on */
			dummy->power_on |= value & 1;
			dummy->power_changed = 1;
			break;
		case SHADER_PWRON_LO:
			dummy->power_on |= (value & 0xF) << 2;
			dummy->power_changed = 1;
			break;
		case L2_PWRON_LO:
			dummy->power_on |= value & 1;
			dummy->power_changed = 1;
			break;
		case STACK_PWRON_LO:
			dummy->stack_power_on_lo |= value;
			dummy->power_changed = 1;
			break;
		case TILER_PWROFF_LO:
			dummy->power_on &= ~((value & 1) << 1);
			dummy->power_changed = 1;
			break;
		case SHADER_PWROFF_LO:
			dummy->power_on &= ~((value & 0xF) << 2);
			dummy->power_changed = 1;
			break;
		case L2_PWROFF_LO:
			dummy->power_on &= ~(value & 1);
			/* Also ensure tiler is powered off */
			dummy->power_on &= ~((value & 1) << 1);
			dummy->power_changed = 1;
			break;
		case STACK_PWROFF_LO:
			dummy->stack_power_on_lo &= ~value;
			dummy->power_changed = 1;
			break;

		case TILER_PWROFF_HI:
		case SHADER_PWROFF_HI:
		case L2_PWROFF_HI:
		case PWR_KEY:
		case PWR_OVERRIDE0:
#if !MALI_USE_CSF
		case JM_CONFIG:
		case PRFCNT_CONFIG:
#else /* !MALI_USE_CSF */
		case CSF_CONFIG:
#endif /* !MALI_USE_CSF */
		case SHADER_CONFIG:
		case TILER_CONFIG:
		case L2_MMU_CONFIG:
			/* Writes ignored */
			break;
		default:
			model_error_log(KBASE_CORE,
				"Dummy model register access: Writing unsupported register 0x%x value 0x%x\n",
								addr, value);
			break;
		}
	}

	midgard_model_update(dummy);
	midgard_model_get_outputs(dummy);
	spin_unlock_irqrestore(&hw_error_status.access_lock, flags);

	return 1;
}

u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
{
	unsigned long flags;
	struct dummy_model_t *dummy = (struct dummy_model_t *)h;

	spin_lock_irqsave(&hw_error_status.access_lock, flags);

	*value = 0;		/* 0 by default */
#if !MALI_USE_CSF
	if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) {
		pr_debug("%s", "JS_ACTIVE being read");

		*value = dummy->job_irq_js_state;
	} else if (addr == GPU_CONTROL_REG(GPU_ID)) {
#else /* !MALI_USE_CSF */
	if (addr == GPU_CONTROL_REG(GPU_ID)) {
#endif /* !MALI_USE_CSF */

		*value = dummy->control_reg_values->gpu_id;
	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) {
		*value = hw_error_status.job_irq_rawstat;
		pr_debug("%s", "JS_IRQ_RAWSTAT being read");
	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) {
		*value = hw_error_status.job_irq_status;
		pr_debug("JS_IRQ_STATUS being read %x", *value);
	}
#if !MALI_USE_CSF
	else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
		int i;

		*value = 0;
		for (i = 0; i < NUM_SLOTS; i++)
			*value |= dummy->slots[i].job_irq_mask << i;
		pr_debug("JS_IRQ_MASK being read %x", *value);
	}
#else /* !MALI_USE_CSF */
	else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK))
		; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
#endif /* !MALI_USE_CSF */
	else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
		*value = (dummy->reset_completed_mask << 8) |
			 ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) |
			 (dummy->power_changed_mask << 9) | (1 << 7) | 1;
		pr_debug("GPU_IRQ_MASK read %x", *value);
	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
		*value = (dummy->power_changed << 9) | (dummy->power_changed << 10) |
			 (dummy->reset_completed << 8) |
#if !MALI_USE_CSF
			 (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
#endif /* !MALI_USE_CSF */
			 ((dummy->clean_caches_completed ? 1u : 0u) << 17) |
			 hw_error_status.gpu_error_irq;
		pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
		*value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) |
			 ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) |
			 ((dummy->reset_completed & dummy->reset_completed_mask) << 8) |
#if !MALI_USE_CSF
			 (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
#endif /* !MALI_USE_CSF */
			 (((dummy->clean_caches_completed &&
			    dummy->clean_caches_completed_irq_enabled) ?
				   1u :
				   0u)
			  << 17) |
			 hw_error_status.gpu_error_irq;
		pr_debug("GPU_IRQ_STAT read %x", *value);
	} else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
		*value = 0;
#if !MALI_USE_CSF
	} else if (addr == GPU_CONTROL_REG(LATEST_FLUSH)) {
		*value = 0;
#endif
	} else if (addr == GPU_CONTROL_REG(GPU_FAULTSTATUS)) {
		*value = hw_error_status.gpu_fault_status;
	} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
		*value = dummy->l2_config;
	} else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
				(addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
		switch (addr) {
		case GPU_CONTROL_REG(SHADER_PRESENT_LO):
		case GPU_CONTROL_REG(SHADER_PRESENT_HI):
		case GPU_CONTROL_REG(TILER_PRESENT_LO):
		case GPU_CONTROL_REG(TILER_PRESENT_HI):
		case GPU_CONTROL_REG(L2_PRESENT_LO):
		case GPU_CONTROL_REG(L2_PRESENT_HI):
		case GPU_CONTROL_REG(STACK_PRESENT_LO):
		case GPU_CONTROL_REG(STACK_PRESENT_HI):
			*value = get_implementation_register(addr);
			break;
		case GPU_CONTROL_REG(SHADER_READY_LO):
			*value = (dummy->power_on >> 0x02) &
			get_implementation_register(
				GPU_CONTROL_REG(SHADER_PRESENT_LO));
			break;
		case GPU_CONTROL_REG(TILER_READY_LO):
			*value = (dummy->power_on >> 0x01) &
				 get_implementation_register(
				GPU_CONTROL_REG(TILER_PRESENT_LO));
			break;
		case GPU_CONTROL_REG(L2_READY_LO):
			*value = dummy->power_on &
				 get_implementation_register(
				GPU_CONTROL_REG(L2_PRESENT_LO));
			break;
		case GPU_CONTROL_REG(STACK_READY_LO):
			*value = dummy->stack_power_on_lo &
				 get_implementation_register(
				GPU_CONTROL_REG(STACK_PRESENT_LO));
			break;

		case GPU_CONTROL_REG(SHADER_READY_HI):
		case GPU_CONTROL_REG(TILER_READY_HI):
		case GPU_CONTROL_REG(L2_READY_HI):
		case GPU_CONTROL_REG(STACK_READY_HI):
			*value = 0;
			break;

		case GPU_CONTROL_REG(SHADER_PWRTRANS_LO):
		case GPU_CONTROL_REG(SHADER_PWRTRANS_HI):
		case GPU_CONTROL_REG(TILER_PWRTRANS_LO):
		case GPU_CONTROL_REG(TILER_PWRTRANS_HI):
		case GPU_CONTROL_REG(L2_PWRTRANS_LO):
		case GPU_CONTROL_REG(L2_PWRTRANS_HI):
		case GPU_CONTROL_REG(STACK_PWRTRANS_LO):
		case GPU_CONTROL_REG(STACK_PWRTRANS_HI):
			*value = 0;
			break;

		case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO):
		case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI):
		case GPU_CONTROL_REG(TILER_PWRACTIVE_LO):
		case GPU_CONTROL_REG(TILER_PWRACTIVE_HI):
		case GPU_CONTROL_REG(L2_PWRACTIVE_LO):
		case GPU_CONTROL_REG(L2_PWRACTIVE_HI):
			*value = 0;
			break;

#if !MALI_USE_CSF
		case GPU_CONTROL_REG(JM_CONFIG):
#else /* !MALI_USE_CSF */
		case GPU_CONTROL_REG(CSF_CONFIG):
#endif /* !MALI_USE_CSF */

		case GPU_CONTROL_REG(SHADER_CONFIG):
		case GPU_CONTROL_REG(TILER_CONFIG):
		case GPU_CONTROL_REG(L2_MMU_CONFIG):
			*value = 0;
			break;

		case GPU_CONTROL_REG(COHERENCY_FEATURES):
			*value = BIT(0) | BIT(1); /* ace_lite and ace, respectively. */
			break;
		case GPU_CONTROL_REG(COHERENCY_ENABLE):
			*value = dummy->coherency_enable;
			break;

		case GPU_CONTROL_REG(THREAD_TLS_ALLOC):
			*value = 0;
			break;

		default:
			model_error_log(KBASE_CORE,
					"Dummy model register access: Reading unknown control reg 0x%x\n",
									addr);
			break;
		}
#if !MALI_USE_CSF
	} else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
			(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
		int slot_idx = (addr >> 7) & 0xf;
		int sub_reg = addr & 0x7F;

		KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
		switch (sub_reg) {
		case JS_HEAD_NEXT_LO:
			*value = (u32) ((hw_error_status.current_jc) &
								0xFFFFFFFF);
			break;
		case JS_HEAD_NEXT_HI:
			*value = (u32) (hw_error_status.current_jc >> 32);
			break;
		case JS_STATUS:
			if (hw_error_status.js_status[slot_idx])
				*value = hw_error_status.js_status[slot_idx];
			else /* 0x08 means active, 0x00 idle */
				*value = (dummy->slots[slot_idx].job_active)
									<< 3;
			break;
		case JS_COMMAND_NEXT:
			*value = dummy->slots[slot_idx].job_queued;
			break;

		/* The dummy model does not implement these registers
		 * avoid printing error messages
		 */
		case JS_HEAD_HI:
		case JS_HEAD_LO:
		case JS_TAIL_HI:
		case JS_TAIL_LO:
		case JS_FLUSH_ID_NEXT:
			break;

		default:
			model_error_log(KBASE_CORE,
				"Dummy model register access: unknown job slot reg 0x%02X being read\n",
								sub_reg);
			break;
		}
#endif /* !MALI_USE_CSF */
	} else if (addr == GPU_CONTROL_REG(AS_PRESENT)) {
		*value = dummy->control_reg_values->as_present;
#if !MALI_USE_CSF
	} else if (addr == GPU_CONTROL_REG(JS_PRESENT)) {
		*value = 0x7;
#endif /* !MALI_USE_CSF */
	} else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) &&
				addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) {
		switch (addr) {
		case GPU_CONTROL_REG(TEXTURE_FEATURES_0):
			*value = 0xfffff;
			break;

		case GPU_CONTROL_REG(TEXTURE_FEATURES_1):
			*value = 0xffff;
			break;

		case GPU_CONTROL_REG(TEXTURE_FEATURES_2):
			*value = 0x9f81ffff;
			break;

		case GPU_CONTROL_REG(TEXTURE_FEATURES_3):
			*value = 0;
			break;
		}
#if !MALI_USE_CSF
	} else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) &&
				addr <= GPU_CONTROL_REG(JS15_FEATURES)) {
		switch (addr) {
		case GPU_CONTROL_REG(JS0_FEATURES):
			*value = 0x20e;
			break;

		case GPU_CONTROL_REG(JS1_FEATURES):
			*value = 0x1fe;
			break;

		case GPU_CONTROL_REG(JS2_FEATURES):
			*value = 0x7e;
			break;

		default:
			*value = 0;
			break;
		}
#endif /* !MALI_USE_CSF */
	} else if (addr >= GPU_CONTROL_REG(L2_FEATURES)
				&& addr <= GPU_CONTROL_REG(MMU_FEATURES)) {
		switch (addr) {
		case GPU_CONTROL_REG(L2_FEATURES):
			*value = 0x6100206;
			break;

		case GPU_CONTROL_REG(CORE_FEATURES):
			*value = dummy->control_reg_values->core_features;
			break;

		case GPU_CONTROL_REG(TILER_FEATURES):
			*value = dummy->control_reg_values->tiler_features;
			break;

		case GPU_CONTROL_REG(MEM_FEATURES):
			/* Bit 0: Core group is coherent */
			*value = 0x01;
			/* Bits 11:8: L2 slice count - 1 */
			*value |= (hweight64(DUMMY_IMPLEMENTATION_L2_PRESENT) - 1) << 8;
			break;

		case GPU_CONTROL_REG(MMU_FEATURES):
			*value = dummy->control_reg_values->mmu_features;
			break;
		}
	} else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS)
				&& addr <= GPU_CONTROL_REG(THREAD_FEATURES)) {
		switch (addr) {
		case GPU_CONTROL_REG(THREAD_FEATURES):
			*value = dummy->control_reg_values->thread_features
					| (IMPLEMENTATION_MODEL << 30);
			break;
		case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE):
			*value = dummy->control_reg_values->thread_max_barrier_size;
			break;
		case GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE):
			*value = dummy->control_reg_values->thread_max_workgroup_size;
			break;
		case GPU_CONTROL_REG(THREAD_MAX_THREADS):
			*value = dummy->control_reg_values->thread_max_threads;
			break;
		}
	} else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO)
				&& addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) {
		*value = 0;
	} else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)
				&& addr <= MMU_AS_REG(15, AS_STATUS)) {
		int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
									>> 6;

		switch (addr & 0x3F) {
		case AS_TRANSTAB_LO:
			*value = (u32)
				(hw_error_status.as_transtab[mem_addr_space] &
								0xffffffff);
			break;

		case AS_TRANSTAB_HI:
			*value = (u32)
				(hw_error_status.as_transtab[mem_addr_space] >>
									32);
			break;

		case AS_STATUS:
			*value = 0;
			break;

		case AS_FAULTSTATUS:
			if (mem_addr_space == hw_error_status.faulty_mmu_as)
				*value = hw_error_status.as_faultstatus[
						hw_error_status.faulty_mmu_as];
			else
				*value = 0;
			break;

		case AS_LOCKADDR_LO:
		case AS_LOCKADDR_HI:
		case AS_MEMATTR_LO:
		case AS_MEMATTR_HI:
		case AS_TRANSCFG_LO:
		case AS_TRANSCFG_HI:
			/* Read ignored */
			*value = 0;
			break;

		default:
			model_error_log(KBASE_CORE,
					"Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n",
							mem_addr_space, addr);
			*value = 0;
			break;
		}
	} else if (addr == MMU_REG(MMU_IRQ_MASK)) {
		*value = hw_error_status.mmu_irq_mask;
	} else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) {
		*value = hw_error_status.mmu_irq_rawstat;
	} else if (addr == MMU_REG(MMU_IRQ_STATUS)) {
		*value = hw_error_status.mmu_irq_mask &
						hw_error_status.mmu_irq_rawstat;
	}
#if MALI_USE_CSF
	else if (addr == IPA_CONTROL_REG(STATUS)) {
		*value = (ipa_control_timer_enabled << 31);
	} else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) &&
		   (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(
				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
		u32 counter_index =
			(addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3;
		bool is_low_word =
			!((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7);

		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW,
						    counter_index, is_low_word);
	} else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) &&
		   (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(
				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
		u32 counter_index =
			(addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3;
		bool is_low_word =
			!((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7);

		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS,
						    counter_index, is_low_word);
	} else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) &&
		   (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(
				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
		u32 counter_index =
			(addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3;
		bool is_low_word =
			!((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7);

		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER,
						    counter_index, is_low_word);
	} else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) &&
		   (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(
				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
		u32 counter_index =
			(addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3;
		bool is_low_word =
			!((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7);

		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
						    counter_index, is_low_word);
	}
#endif
	else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
		*value = dummy->control_reg_values->gpu_features_lo;
	} else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) {
		*value = dummy->control_reg_values->gpu_features_hi;
	} else {
		model_error_log(KBASE_CORE,
			"Dummy model register access: Reading unsupported register 0x%x. Returning 0\n",
									addr);
		*value = 0;
	}

	spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
	CSTD_UNUSED(dummy);

	return 1;
}

static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset,
				     u32 usr_data_size, u32 core_count)
{
	u32 sample_size;
	u32 *usr_data = NULL;

	lockdep_assert_held(&performance_counters.access_lock);

	sample_size =
		core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32);

	if ((usr_data_size >= usr_data_offset) &&
	    (sample_size <= usr_data_size - usr_data_offset))
		usr_data = usr_data_start + (usr_data_offset / sizeof(u32));

	if (!usr_data)
		model_error_log(KBASE_CORE, "Unable to set counter sample 1");
	else {
		u32 loop_cnt = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
		u32 i;

		for (i = 0; i < loop_cnt; i++) {
			counters[i] = usr_data[i];
		}
	}

	return usr_data_offset + sample_size;
}

static u32 set_kernel_sample_core_type(u64 *counters,
	u64 *usr_data_start, u32 usr_data_offset,
	u32 usr_data_size, u32 core_count)
{
	u32 sample_size;
	u64 *usr_data = NULL;

	lockdep_assert_held(&performance_counters.access_lock);

	sample_size =
		core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64);

	if ((usr_data_size >= usr_data_offset) &&
	    (sample_size <= usr_data_size - usr_data_offset))
		usr_data = usr_data_start + (usr_data_offset / sizeof(u64));

	if (!usr_data)
		model_error_log(KBASE_CORE, "Unable to set kernel counter sample 1");
	else
		memcpy(counters, usr_data, sample_size);

	return usr_data_offset + sample_size;
}

/* Counter values injected through ioctl are of 32 bits */
int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size)
{
	unsigned long flags;
	u32 *user_data;
	u32 offset = 0;

	if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32))
		return -EINVAL;

	/* copy_from_user might sleep so can't be called from inside a spinlock
	 * allocate a temporary buffer for user data and copy to that before taking
	 * the lock
	 */
	user_data = kmalloc(size, GFP_KERNEL);
	if (!user_data)
		return -ENOMEM;

	if (copy_from_user(user_data, data, size)) {
		model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace");
		kfree(user_data);
		return -EINVAL;
	}

	spin_lock_irqsave(&performance_counters.access_lock, flags);
#if !MALI_USE_CSF
	offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset,
					   size, 1);
#else
	offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset,
					   size, 1);
#endif /* !MALI_USE_CSF */
	offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset,
					   size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
	offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset,
					   size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
	offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset,
					   size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
	spin_unlock_irqrestore(&performance_counters.access_lock, flags);

	kfree(user_data);
	return 0;
}

/* Counter values injected through kutf are of 64 bits */
void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size)
{
	unsigned long flags;
	u32 offset = 0;

	spin_lock_irqsave(&performance_counters.access_lock, flags);
#if !MALI_USE_CSF
	offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size,
					     1);
#else
	offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size,
					     1);
#endif /* !MALI_USE_CSF */
	offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset,
					     size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
	offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size,
					     hweight64(performance_counters.l2_present));
	offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset,
					     size, hweight64(performance_counters.shader_present));
	spin_unlock_irqrestore(&performance_counters.access_lock, flags);
}
KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample);

void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
		u64 *l2_present, u64 *shader_present)
{
	if (shader_present)
		*shader_present = performance_counters.shader_present;
	if (l2_present)
		*l2_present = performance_counters.l2_present;
}
KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores);

void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
		u64 l2_present, u64 shader_present)
{
	if (WARN_ON(!l2_present || !shader_present
			|| hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS
			|| hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES))
		return;

	performance_counters.l2_present = l2_present;
	performance_counters.shader_present = shader_present;

	/* Update the GPU properties used by vinstr to calculate the counter
	 * dump buffer size.
	 */
	kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present);
	kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present;
	kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present);
	kbdev->gpu_props.curr_config.shader_present = shader_present;
}
KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores);

int gpu_model_control(void *model,
				struct kbase_model_control_params *params)
{
	struct dummy_model_t *dummy = (struct dummy_model_t *)model;
	int i;
	unsigned long flags;

	if (params->command == KBASE_MC_DISABLE_JOBS) {
		for (i = 0; i < NUM_SLOTS; i++)
			dummy->slots[i].job_disabled = params->value;
	} else {
		return -EINVAL;
	}

	spin_lock_irqsave(&hw_error_status.access_lock, flags);
	midgard_model_update(dummy);
	midgard_model_get_outputs(dummy);
	spin_unlock_irqrestore(&hw_error_status.access_lock, flags);

	return 0;
}
